In [1]:
from util import *
from glob import glob
import matplotlib.pyplot as plt
from shapely import wkt
pd.set_option("display.max_columns", None)
In [2]:
gdf = gpd.read_file(f"Manuwatu_Test/transects_intersects_20240315_120828.geojson")
gdf["Date"] = pd.to_datetime(gdf.ShorelineID, dayfirst=True, format='mixed')
gdf["Year"] = gdf.Date.dt.year
gdf["YearsSinceBase"] = (gdf.Date - pd.Timestamp(1800, 1, 1)).dt.days / 365.25
gdf["YearsUntilFuture"] = (
    pd.Timestamp(2100, 1, 1) - gdf.Date
    ).dt.days / 365.25
gdf.Date = gdf.Date.astype(str)
gdf
Out[2]:
TransectID ShorelineID BaselineID Distance IntersectX IntersectY Uncertainty geometry Date Year YearsSinceBase YearsUntilFuture
0 1 05/12/2020 0 -30.72 1.664823e+06 5.652036e+06 10.00 POINT (1664822.752 5652035.621) 2020-12-05 2020 220.922656 79.071869
1 1 11/02/2017 0 -29.69 1.664822e+06 5.652035e+06 10.00 POINT (1664821.739 5652035.474) 2017-02-11 2017 217.108830 82.885695
2 1 08/11/1970 0 -27.86 1.664820e+06 5.652035e+06 2.63 POINT (1664819.922 5652035.211) 1970-11-08 1970 170.847365 129.147159
3 1 19/09/1955 0 -25.63 1.664818e+06 5.652035e+06 2.68 POINT (1664817.715 5652034.890) 1955-09-19 1955 155.709788 144.284736
4 2 05/12/2020 0 -31.94 1.664830e+06 5.651936e+06 10.00 POINT (1664830.066 5651936.243) 2020-12-05 2020 220.922656 79.071869
... ... ... ... ... ... ... ... ... ... ... ... ...
9214 1925 23/01/2016 244 -39.52 1.904293e+06 5.511354e+06 1.02 POINT (1904293.154 5511353.967) 2016-01-23 2016 216.054757 83.939767
9215 1925 31/03/1944 244 -29.47 1.904294e+06 5.511344e+06 2.27 POINT (1904293.505 5511343.923) 1944-03-31 1944 144.240931 155.753593
9216 1926 29/04/2021 244 -36.22 1.904393e+06 5.511358e+06 1.02 POINT (1904392.891 5511358.332) 2021-04-29 2021 221.319644 78.674880
9217 1926 23/01/2016 244 -36.58 1.904393e+06 5.511359e+06 1.02 POINT (1904392.878 5511358.698) 2016-01-23 2016 216.054757 83.939767
9218 1926 31/03/1944 244 -24.95 1.904393e+06 5.511347e+06 2.27 POINT (1904393.306 5511347.071) 1944-03-31 1944 144.240931 155.753593

9219 rows × 12 columns

In [3]:
lines = gpd.read_file("Manuwatu_Test/t10_transects_azi.shp")
lines
Out[3]:
ObjectID_1 BaselineID TransOrder TransEdit Azimuth geometry
0 1.0 0.0 1.0 0.0 81.743293 LINESTRING Z (1664792.355 5652031.210 0.000, 1...
1 2.0 0.0 2.0 0.0 81.593561 LINESTRING Z (1664798.470 5651931.574 0.000, 1...
2 3.0 0.0 3.0 0.0 80.678646 LINESTRING Z (1664800.863 5651831.613 0.000, 1...
3 4.0 0.0 4.0 0.0 80.416187 LINESTRING Z (1664809.739 5651732.147 0.000, 1...
4 5.0 0.0 5.0 0.0 77.954781 LINESTRING Z (1664832.184 5651634.801 0.000, 1...
... ... ... ... ... ... ...
1889 1924.0 244.0 1924.0 0.0 358.475284 LINESTRING Z (1904194.624 5511311.003 0.000, 1...
1890 1925.0 244.0 1925.0 0.0 357.996994 LINESTRING Z (1904294.535 5511314.467 0.000, 1...
1891 1926.0 244.0 1926.0 0.0 357.892961 LINESTRING Z (1904394.223 5511322.139 0.000, 1...
1892 1927.0 245.0 1927.0 0.0 343.686148 LINESTRING Z (1787772.153 5517489.901 0.000, 1...
1893 1928.0 245.0 1928.0 0.0 343.686148 LINESTRING Z (1787867.064 5517520.834 0.000, 1...

1894 rows × 6 columns

In [4]:
def get_azimuth(line):
    p1, p2 = line.coords[1:]
    azimuth = math.degrees(math.atan2(p2[0]-p1[0], p2[1]-p1[1]))
    if azimuth < 0:
        azimuth += 360
    return azimuth
lines["calculated_azimuth"] = lines.geometry.apply(get_azimuth)
lines
Out[4]:
ObjectID_1 BaselineID TransOrder TransEdit Azimuth geometry calculated_azimuth
0 1.0 0.0 1.0 0.0 81.743293 LINESTRING Z (1664792.355 5652031.210 0.000, 1... 81.743293
1 2.0 0.0 2.0 0.0 81.593561 LINESTRING Z (1664798.470 5651931.574 0.000, 1... 81.593561
2 3.0 0.0 3.0 0.0 80.678646 LINESTRING Z (1664800.863 5651831.613 0.000, 1... 80.678646
3 4.0 0.0 4.0 0.0 80.416187 LINESTRING Z (1664809.739 5651732.147 0.000, 1... 80.416187
4 5.0 0.0 5.0 0.0 77.954781 LINESTRING Z (1664832.184 5651634.801 0.000, 1... 77.954781
... ... ... ... ... ... ... ...
1889 1924.0 244.0 1924.0 0.0 358.475284 LINESTRING Z (1904194.624 5511311.003 0.000, 1... 358.475284
1890 1925.0 244.0 1925.0 0.0 357.996994 LINESTRING Z (1904294.535 5511314.467 0.000, 1... 357.996994
1891 1926.0 244.0 1926.0 0.0 357.892961 LINESTRING Z (1904394.223 5511322.139 0.000, 1... 357.892961
1892 1927.0 245.0 1927.0 0.0 343.686148 LINESTRING Z (1787772.153 5517489.901 0.000, 1... 343.686148
1893 1928.0 245.0 1928.0 0.0 343.686148 LINESTRING Z (1787867.064 5517520.834 0.000, 1... 343.686148

1894 rows × 7 columns

In [5]:
gdf.Year.describe()
Out[5]:
count    9219.000000
mean     1990.560581
std        28.088717
min      1939.000000
25%      1967.000000
50%      1995.000000
75%      2017.000000
max      2022.000000
Name: Year, dtype: float64
In [6]:
gdf.Distance.describe()
Out[6]:
count    9219.000000
mean      -61.999667
std        69.108880
min     -1914.190000
25%       -74.220000
50%       -39.960000
75%       -25.395000
max        -4.770000
Name: Distance, dtype: float64
In [7]:
transect_metadata = get_transect_metadata(f"Manuwatu_Test/t10_transects_azi.shp")
In [8]:
#win_type='gaussian' allows weighting to occur in the rolling slope function
#####BUG IN THE CODE THAT DOES NOT ALLOW WEIGHTING TO OCCUR#####
linear_models = fit(gdf, transect_metadata)
rolled_slopes = linear_models.groupby("group").slope.rolling(10, min_periods=1, win_type="gaussian").mean().dropna().reset_index(level=0)
linear_models.slope = rolled_slopes.slope
linear_models.dropna(inplace=True)
linear_models
c:\Users\lalit\anaconda3\envs\environment\lib\site-packages\sklearn\metrics\_regression.py:996: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.
  warnings.warn(msg, UndefinedMetricWarning)
c:\Users\lalit\anaconda3\envs\environment\lib\site-packages\sklearn\metrics\_regression.py:996: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.
  warnings.warn(msg, UndefinedMetricWarning)
c:\Users\lalit\anaconda3\envs\environment\lib\site-packages\sklearn\metrics\_regression.py:996: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples.
  warnings.warn(msg, UndefinedMetricWarning)
Out[8]:
TransectID slope intercept group r2_score mae mse rmse
0 1 -0.065571 -15.941371 0 0.926099 0.504336 0.276915 0.526227
1 2 -0.078177 -11.259434 0 0.957393 0.507380 0.296034 0.544090
2 3 -0.070524 -15.562882 0 0.589842 1.023301 1.711207 1.308131
3 4 -0.066901 -15.276985 0 0.686399 0.931125 1.157774 1.075999
4 5 -0.069303 -13.214268 0 0.760956 1.230886 1.578735 1.256477
... ... ... ... ... ... ... ... ...
1885 1923 0.225420 -28.613747 163 0.026739 1.182115 1.962700 1.400964
1886 1924 0.106620 -16.956773 163 0.999680 0.052252 0.003835 0.061926
1887 1925 0.026290 -10.756660 163 0.986967 0.443378 0.276111 0.525462
1888 1926 -0.012694 -2.926398 163 0.992207 0.402196 0.227201 0.476656
1889 1928 -1.901856 -199.126821 164 0.996629 0.849264 0.882216 0.939263

1887 rows × 8 columns

In [9]:
def calculate_new_coordinates(old_x, old_y, bearing, distance):
    bearing_radians = math.radians(bearing)
    new_x = old_x + (distance * math.sin(bearing_radians))
    new_y = old_y + (distance * math.cos(bearing_radians))
    point = Point(new_x, new_y)
    assert not point.is_empty
    return point

def predict(
    df: pd.DataFrame,
    linear_models: pd.DataFrame,
    transect_metadata: dict,
):
    """_summary_

    Args:
        df (pd.DataFrame): dataframe with columns: TransectID, Date, Distance, YearsSinceBase
        linear_models (pd.DataFrame): dataframe with columns: TransectID, slope, intercept
        transect_metadata (dict): dict lookup of TransectID to Azimuth & group
        
    Returns:
        pd.DataFrame: resulting prediction points for the year 2100
    """
    results = []
    for i, row in linear_models.iterrows():
        transect_ID = row.TransectID
        transect_df = df[df.TransectID == transect_ID]
        latest_row = transect_df[transect_df.Date == transect_df["Date"].max()].iloc[0]
        future_year = int(row.get("FUTURE_YEAR", FUTURE_YEAR))
        result = row.to_dict()
        result.update({
            "TransectID": transect_ID,
            "BaselineID": latest_row.BaselineID,
            "group": row.group,
            "Year": future_year,
            "ocean_point": calculate_new_coordinates(
                latest_row.geometry.x,
                latest_row.geometry.y,
                transect_metadata[transect_ID]["Azimuth"] + 180,
                500,
            ),
        })
        
        model = "linear"
        slope = row.slope
        intercept = row.intercept

        predicted_distance = slope * (future_year - 1800) + intercept
        distance_difference = latest_row.Distance - predicted_distance
        result[f"{model}_model_point"] = calculate_new_coordinates(
            latest_row.geometry.x,
            latest_row.geometry.y,
            transect_metadata[transect_ID]["Azimuth"],
            distance_difference,
        )
        result[f"{model}_model_predicted_distance"] = predicted_distance
        result[f"{model}_model_distance"] = distance_difference
        results.append(result)
    results = gpd.GeoDataFrame(results)
    return results
In [10]:
results = predict(gdf, linear_models, transect_metadata)
results
Out[10]:
TransectID slope intercept group r2_score mae mse rmse BaselineID Year ocean_point linear_model_point linear_model_predicted_distance linear_model_distance
0 1.0 -0.065571 -15.941371 0.0 0.926099 0.504336 0.276915 0.526227 0 2100 POINT (1664327.9351324248 5651963.817190449) POINT (1664827.5942540085 5652036.324033046) -35.612543 4.892543
1 2.0 -0.078177 -11.259434 0.0 0.957393 0.507380 0.296034 0.544090 0 2100 POINT (1664335.437879246 5651863.146308652) POINT (1664832.8086359142 5651936.648743532) -34.712590 2.772590
2 3.0 -0.070524 -15.562882 0.0 0.589842 1.023301 1.711207 1.308131 0 2100 POINT (1664335.297331601 5651755.195818301) POINT (1664837.1025794188 5651837.561631945) -36.720043 8.520043
3 4.0 -0.066901 -15.276985 0.0 0.686399 0.931125 1.157774 1.075999 0 2100 POINT (1664344.5869323318 5651653.607125445) POINT (1664844.5968896376 5651738.032181877) -35.347317 7.087317
4 5.0 -0.069303 -13.214268 0.0 0.760956 1.230886 1.578735 1.256477 0 2100 POINT (1664374.3334289133 5651537.103741099) POINT (1664865.4424388874 5651641.897360339) -34.005075 2.165075
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1882 1923.0 0.225420 -28.613747 163.0 0.026739 1.182115 1.962700 1.400964 244 2100 POINT (1904103.7288165868 5510835.704927893) POINT (1904095.4086911152 5511271.11324593) 39.012195 -64.512195
1883 1924.0 0.106620 -16.956773 163.0 0.999680 0.052252 0.003835 0.061926 244 2100 POINT (1904206.894713303 5510849.992895648) POINT (1904195.0237154006 5511295.975836613) 15.029098 -53.859098
1884 1925.0 0.026290 -10.756660 163.0 0.986967 0.443378 0.276111 0.525462 244 2100 POINT (1904310.650852537 5510853.672411901) POINT (1904294.4349141892 5511317.338654199) -2.869718 -36.050282
1885 1926.0 -0.012694 -2.926398 163.0 0.992207 0.402196 0.227201 0.476656 244 2100 POINT (1904411.2744729214 5510858.670062316) POINT (1904393.9753056369 5511328.866490144) -6.734550 -29.485450
1886 1928.0 -1.901856 -199.126821 164.0 0.996629 0.849264 0.882216 0.939263 245 2100 POINT (1787833.198058164 5517636.541552375) POINT (1787650.8600291626 5518259.52981814) -769.683668 149.123668

1887 rows × 14 columns

In [11]:
results.set_geometry("linear_model_point", inplace=True, crs=2193)
results
Out[11]:
TransectID slope intercept group r2_score mae mse rmse BaselineID Year ocean_point linear_model_point linear_model_predicted_distance linear_model_distance
0 1.0 -0.065571 -15.941371 0.0 0.926099 0.504336 0.276915 0.526227 0 2100 POINT (1664327.9351324248 5651963.817190449) POINT (1664827.594 5652036.324) -35.612543 4.892543
1 2.0 -0.078177 -11.259434 0.0 0.957393 0.507380 0.296034 0.544090 0 2100 POINT (1664335.437879246 5651863.146308652) POINT (1664832.809 5651936.649) -34.712590 2.772590
2 3.0 -0.070524 -15.562882 0.0 0.589842 1.023301 1.711207 1.308131 0 2100 POINT (1664335.297331601 5651755.195818301) POINT (1664837.103 5651837.562) -36.720043 8.520043
3 4.0 -0.066901 -15.276985 0.0 0.686399 0.931125 1.157774 1.075999 0 2100 POINT (1664344.5869323318 5651653.607125445) POINT (1664844.597 5651738.032) -35.347317 7.087317
4 5.0 -0.069303 -13.214268 0.0 0.760956 1.230886 1.578735 1.256477 0 2100 POINT (1664374.3334289133 5651537.103741099) POINT (1664865.442 5651641.897) -34.005075 2.165075
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1882 1923.0 0.225420 -28.613747 163.0 0.026739 1.182115 1.962700 1.400964 244 2100 POINT (1904103.7288165868 5510835.704927893) POINT (1904095.409 5511271.113) 39.012195 -64.512195
1883 1924.0 0.106620 -16.956773 163.0 0.999680 0.052252 0.003835 0.061926 244 2100 POINT (1904206.894713303 5510849.992895648) POINT (1904195.024 5511295.976) 15.029098 -53.859098
1884 1925.0 0.026290 -10.756660 163.0 0.986967 0.443378 0.276111 0.525462 244 2100 POINT (1904310.650852537 5510853.672411901) POINT (1904294.435 5511317.339) -2.869718 -36.050282
1885 1926.0 -0.012694 -2.926398 163.0 0.992207 0.402196 0.227201 0.476656 244 2100 POINT (1904411.2744729214 5510858.670062316) POINT (1904393.975 5511328.866) -6.734550 -29.485450
1886 1928.0 -1.901856 -199.126821 164.0 0.996629 0.849264 0.882216 0.939263 245 2100 POINT (1787833.198058164 5517636.541552375) POINT (1787650.860 5518259.530) -769.683668 149.123668

1887 rows × 14 columns

In [12]:
poly = prediction_results_to_polygon(results)
output_shapefile = "Manuwatu_Test/weighted_projection_output.shp"
poly.to_file(output_shapefile, driver="ESRI Shapefile")
In [13]:
m = poly.explore(tiles="Esri.WorldImagery")
gpd.GeoDataFrame(results.drop(columns=["ocean_point", "linear_model_point"]), geometry=results.linear_model_point).explore(m=m)
gdf.explore("Year", legend=True, m=m)
Out[13]:
Make this Notebook Trusted to load map: File -> Trust Notebook